#### setting environment ####
require(quanteda)

# function to compute confidence intervals
confidence <- function(x, level = 0.95, upper = TRUE){
  x <- na.omit(x)
  n <- length(x)
  mean <- mean(x, na.rm = TRUE)
  SE <- sd(x, na.rm = TRUE) / sqrt(n)
  Q <- qnorm((1 - level) / 2, lower.tail = FALSE)
  if (upper == TRUE) {
    bound <- mean + Q * SE
  } else {
    bound <- mean - Q * SE
  }
  return(bound)
}

#### data preparation ####
# document-feature matrix of newspaper editorials on the security bills
load("Study1_dfm.Rdata")
# human-coded positions of the newspapers
humancoded.data <- read.csv("humancoded_positions.csv")

## number of editorials (Table 1)
table(docvars(Study1.editorial.dfm, "Newspaper"))

## prepare the newspaper-level document-feature matrix
# compress the document-feature matrix at the newspaper level
Study1.dfm <- dfm_group(Study1.editorial.dfm, groups = "Newspaper")
# remove words not used by two or more newspapers
Study1.dfm <- dfm_trim(Study1.dfm, min_docfreq = 2, docfreq_type = "count")

## size of the document-feature matrix
dim(Study1.dfm)

#### estimate the Wordfish model ####
set.seed(12345)
Study1.wordfish.result <- textmodel_wordfish(Study1.dfm, sparse = TRUE)

#### illustrate the results (Figure 1) ####
## positions of features
features <- data.frame(Word = Study1.wordfish.result$features, 
                       Polarity = Study1.wordfish.result$beta, 
                       Frequency = Study1.wordfish.result$psi)
label.words <- c("法案", "安保法案", "集団的自衛権", 
                 "憲法違反", "反対", "デモ", "廃案", 
                 "強化", "同盟", "抑止力", "意義")
change.label.words <- c("bills", "security bills", "right of collective self-defense", 
                        "unconstitutional", "protest", "demonstration", "scrap", 
                        "intensify", "alliance", "deterrent", "useful")
features$Label <- Study1.wordfish.result$features
features$Label[! features$Word %in% label.words] <- ""
for (i in 1:11) {
  features$Label[features$Word == label.words[i]] <- change.label.words[i]
}

## positions of newspapers
Study1.ideal.points <- data.frame(Newspaper = Study1.wordfish.result$docs, 
                                  Wordfish = Study1.wordfish.result$theta, 
                                  SE = Study1.wordfish.result$se.theta)
Study1.ideal.points.order <- order(Study1.ideal.points$Wordfish, decreasing = TRUE)

## human-coded positions
# average of editorial positions coded by three coders
humancoded.data$Ideology <- (humancoded.data$CoderA + 
                               humancoded.data$CoderB + 
                               humancoded.data$CoderC) / 3

# compute newspapers' positions by averaging editorial positions
newspaper.names <- unique(humancoded.data$Newspaper)
humancoded.positions <- data.frame(Newspaper = newspaper.names, 
                                   Mean = rep(NA, 10), 
                                   Upper = rep(NA, 10), 
                                   Lower = rep(NA, 10))
for (i in 1:10) {
  subset.data <- subset(humancoded.data, Newspaper == newspaper.names[i])
  humancoded.positions$Mean[i] <- mean(subset.data$Ideology)
  humancoded.positions$Upper[i] <- confidence(subset.data$Ideology, upper = TRUE)
  humancoded.positions$Lower[i] <- confidence(subset.data$Ideology, upper = FALSE)
}
humancoded.positions.order <- order(humancoded.positions$Mean, decreasing = TRUE)

## draw the figure
cairo_pdf("Figure_1.pdf",
          width = 10, height = 12, pointsize = 12, family = "Helvetica")
split.screen(c(2, 1))
split.screen(c(1, 2), screen = 2)
screen(1); plot(features$Polarity, features$Frequency, 
                xlab = "Word Weights", ylab = "Word Fixed Effect", 
                pch = 21, bg = "gray", col = "gray")
title("(a) Word weights and fixed effects from the Wordfish model")
text(features$Polarity, features$Frequency, 
     labels = features$Label, 
     cex = 0.9, offset = 10)

screen(3); dotchart(Study1.ideal.points$Wordfish[Study1.ideal.points.order], 
                    labels = Study1.ideal.points$Newspaper[Study1.ideal.points.order], 
                    pch = 19, xlim = c(-1.7, 1.7), cex = 0.96, 
                    xlab = "Estimated Ideal Points")
title("(b) Estimation results of the ideal points \non the security bills from the Wordfish model")
segments(Study1.ideal.points$Wordfish[Study1.ideal.points.order] - 
           Study1.ideal.points$SE[Study1.ideal.points.order] * 1.96, 1:10, 
         Study1.ideal.points$Wordfish[Study1.ideal.points.order] + 
           Study1.ideal.points$SE[Study1.ideal.points.order] * 1.96, 1:10)

screen(4); dotchart(humancoded.positions$Mean[humancoded.positions.order], 
                    labels = humancoded.positions$Newspaper[humancoded.positions.order], 
                    pch = 19, xlim = c(1.8, 4.7), cex = 0.96, 
                    xlab = "Hand-Coded Positions")
title("(c) Hand-coded positions \non the security bills")
segments(humancoded.positions$Lower[humancoded.positions.order], 1:10, 
         humancoded.positions$Upper[humancoded.positions.order], 1:10)
dev.off()